{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import gzip,sys,struct"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_images = \"dataset/train-images-idx3-ubyte.gz\"\n",
    "train_labels = \"dataset/train-labels-idx1-ubyte.gz\"\n",
    "t10k_images = \"dataset/t10k-images-idx3-ubyte.gz\"\n",
    "t10k_labels = \"dataset/t10k-labels-idx1-ubyte.gz\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train labels >>>>>>>>>>>>>>>>>>\n",
      "magic: 2049 \n",
      "numberOfLabels: 60000\n",
      "[5, 0, 4, 1, 9, 2, 1, 3, 1, 4, 3, 5, 3, 6, 1, 7, 2, 8, 6, 9, 4, 0, 9, 1, 1, 2, 4, 3, 2, 7, 3, 8, 6, 9, 0, 5, 6, 0, 7, 6, 1, 8, 7, 9, 3, 9, 8, 5, 9, 3, 3, 0, 7, 4, 9, 8, 0, 9, 4, 1, 4, 4, 6, 0, 4, 5, 6, 1, 0, 0, 1, 7, 1, 6, 3, 0, 2, 1, 1, 7, 9, 0, 2, 6, 7, 8, 3, 9, 0, 4, 6, 7, 4, 6, 8, 0, 7, 8, 3, 1]\n",
      "test labels >>>>>>>>>>>>>>>>>>>>\n",
      "magic: 2049 \n",
      "numberOfLabels: 10000\n",
      "[7, 2, 1, 0, 4, 1, 4, 9, 5, 9, 0, 6, 9, 0, 1, 5, 9, 7, 3, 4, 9, 6, 6, 5, 4, 0, 7, 4, 0, 1, 3, 1, 3, 4, 7, 2, 7, 1, 2, 1, 1, 7, 4, 2, 3, 5, 1, 2, 4, 4, 6, 3, 5, 5, 6, 0, 4, 1, 9, 5, 7, 8, 9, 3, 7, 4, 6, 4, 3, 0, 7, 0, 2, 9, 1, 7, 3, 2, 9, 7, 7, 6, 2, 7, 8, 4, 7, 3, 6, 1, 3, 6, 9, 3, 1, 4, 1, 7, 6, 9]\n"
     ]
    }
   ],
   "source": [
    "def read_labels(filename):\n",
    "    labels=[]\n",
    "    with gzip.open(filename) as bytestream:\n",
    "        index=0\n",
    "        buf=bytestream.read()\n",
    "        bytestream.close()\n",
    "        \n",
    "        #解压数据\n",
    "        magic,numberOfLabels=struct.unpack_from('>II',buf,index)\n",
    "        print('magic:',magic,'\\n'\n",
    "              'numberOfLabels:',numberOfLabels)\n",
    "        \n",
    "        index=8\n",
    "        while index<numberOfLabels:\n",
    "            num=int(struct.unpack_from('B',buf,index)[0])\n",
    "            labels.append(num)\n",
    "            index+=1\n",
    "    return labels\n",
    "\n",
    "print('train labels >>>>>>>>>>>>>>>>>>')\n",
    "train_label=read_labels(train_labels)\n",
    "print(train_label[:100])\n",
    "\n",
    "print('test labels >>>>>>>>>>>>>>>>>>>>')\n",
    "t10k_label=read_labels(t10k_labels)\n",
    "print(t10k_label[:100])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "magic: 2051 \n",
      "numberOfImages: 60000 \n",
      "rows: 28 \n",
      "col: 28\n",
      "label: 5\n",
      "0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   126 136 175 0   166 255 247 127 0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   0   0   94  154 170 253 253 253 253 253 225 172 253 242 195 64  0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   238 253 253 253 253 253 253 253 253 251 93  82  82  56  0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   219 253 253 253 253 253 198 182 247 241 0   0   0   0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   80  156 107 253 253 205 0   0   0   154 0   0   0   0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   0   0   0   154 253 90  0   0   0   0   0   0   0   0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   0   0   0   139 253 190 0   0   0   0   0   0   0   0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   0   0   0   0   190 253 70  0   0   0   0   0   0   0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   0   0   0   0   0   241 225 160 108 0   0   0   0   0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   0   0   0   0   0   81  240 253 253 119 0   0   0   0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   186 253 253 150 0   0   0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   93  252 253 187 0   0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   249 253 249 64  0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   130 183 253 253 207 0   0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   0   0   0   0   0   148 229 253 253 253 250 182 0   0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   0   0   0   114 221 253 253 253 253 201 78  0   0   0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   0   66  213 253 253 253 253 198 81  0   0   0   0   0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   171 219 253 253 253 253 195 80  0   0   0   0   0   0   0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   55  172 226 253 253 253 253 244 133 0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   136 253 253 253 212 135 132 0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   \n",
      "\n",
      "0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   \n",
      "\n"
     ]
    }
   ],
   "source": [
    "def read_img(filename,labels):\n",
    "    with gzip.open(filename) as bytestream:\n",
    "        index=0\n",
    "        buf=bytestream.read()\n",
    "        bytestream.close()\n",
    "        \n",
    "        magic,numberOfImages,rows,col=struct.unpack_from('>IIII',buf,index)\n",
    "        print('magic:',magic,'\\n'\n",
    "              'numberOfImages:',numberOfImages,'\\n'\n",
    "              'rows:',rows,'\\n'\n",
    "              'col:',col)\n",
    "        \n",
    "        index=16\n",
    "        for i in range(1):\n",
    "            print('label:',labels[i])\n",
    "            for x in range(rows):\n",
    "                for y in range(col):\n",
    "                    num=int(struct.unpack_from('B',buf,index)[0])\n",
    "                    if num>100:\n",
    "                        sys.stdout.write(str(num)+\" \")\n",
    "                    elif num>50:\n",
    "                        sys.stdout.write(str(num)+\"  \")\n",
    "                    else:\n",
    "                        sys.stdout.write(str('0   '))\n",
    "                    index+=1\n",
    "                sys.stdout.write(str('\\n'))\n",
    "                sys.stdout.write(str('\\n'))\n",
    "            sys.stdout.flush()\n",
    "    return\n",
    "\n",
    "read_img(train_images,train_label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[0. 0. 0. ... 0. 0. 0.]\n",
      " [1. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " ...\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]]\n",
      "<class 'numpy.ndarray'>\n",
      "[[0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " ...\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]\n",
      " [0. 0. 0. ... 0. 0. 0.]]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "def read32(bytestream):\n",
    "    # 由于网络数据的编码是大端，所以需要加上>\n",
    "    dt = np.dtype(np.int32).newbyteorder('>')\n",
    "    data = bytestream.read(4)\n",
    "    return np.frombuffer(data, dt)[0]\n",
    "\n",
    "def read_label2(filename):\n",
    "    with gzip.open(filename) as bytestream:\n",
    "        magic = read32(bytestream)\n",
    "        numberOfLabels = read32(bytestream)\n",
    "        labels = np.frombuffer(bytestream.read(numberOfLabels), np.uint8)\n",
    "        data = np.zeros((numberOfLabels, 10))\n",
    "        for i in range(100):\n",
    "            data[i][labels[i]] = 1\n",
    "        bytestream.close()\n",
    "    return data\n",
    "\n",
    "def read_img2(filename):\n",
    "    # 把文件解压成字节流\n",
    "    with gzip.open(filename) as bytestream:\n",
    "        magic = read32(bytestream)\n",
    "        numberOfImages = read32(bytestream)\n",
    "        rows = read32(bytestream)\n",
    "        columns = read32(bytestream)\n",
    "        images = np.frombuffer(bytestream.read(numberOfImages * rows * columns), np.uint8)\n",
    "        images.shape = (numberOfImages, rows * columns)\n",
    "        images = images.astype(np.float32)\n",
    "        images = np.multiply(images, 1.0 / 255.0)\n",
    "        bytestream.close()\n",
    "    return images\n",
    "\n",
    "trainlabel=read_label2(train_labels)\n",
    "print(trainlabel)\n",
    "\n",
    "trainimg=read_img2(train_images)\n",
    "print(trainimg)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
